Rui Chen’s Human Retina Cell Atlas and fetal Human Retina Cell Atlas
And GTEx’s prototype single cell atlas.
Well, I’d rather collect another citation, but NR6A1 isn’t well detected in single cell RNAseq (not shown why). All of the resources used here are single nucleus RNA-seq - which capture the RNA from the nucleus of the cell.
Evidence of higher NR6A1 (in both fetal and adult retina datasets) in the horizontal cells (HC).
In the non-retina GTEx dataset, NR6A1 is a bit higher in epithelial cells. No evidence of NR6A1 in (adult) RPE, though.
snRNA h5ad retrieved from here and here on 2024-05-15.
Download the GTEx v8 data on 2024-05-16.
The single nucleus data was aggregated (with the ADPBulk python package) to the cell type and sample level by summing the counts. This is also known as a “pseudobulk” process as it turns single cells into a data type with the rough properties of a bulk RNA-seq dataset.
“pseudobulk” samples with lower overall counts (derived by hand inspecting) the overall distributions of overall expression were removed.
The data was CPM scaled and log1p transformed for plotting.
mamba activate scvi1.0.4
python src/adata_to_pseudobulk.py ~/data/chen_rca/chen_fetal_hrca_snRNA_88444d73-7f55-4a62-bcfe-e929878c6c78.h5ad donor_id,development_stage,majorclass data/chen_fetal_hrca.pb_raw.csv.gz
python src/adata_to_pseudobulk.py ~/data/chen_rca/chen_adult_hrca_snRNA_89f6a640-0537-4fd1-bdf9-540db9dd0b7d.h5ad donor_id,majorclass data/chen_adult_hrca.pb_raw.csv.gz
python src/adata_to_pseudobulk.py /Users/mcgaugheyd/data/gtex/GTEx_8_tissues_snRNAseq_atlas_071421.public_obs.h5ad Sample\ ID,Tissue,Broad\ cell\ type data/gtex_v8_pb.csv.gz
library(tidyverse)
pb <- data.table::fread("../data/chen_adult_hrca.pb_raw.csv.gz")
pb_mat <- pb[,2:ncol(pb)] %>% as.matrix()
row.names(pb_mat) <- pb %>% pull(1)
pb_mat <- pb_mat[pb_mat %>% rowSums() %>% log1p() > 12,]
pb_transform <- t(metamoRph::normalize_data(t(pb_mat), log1p = TRUE))
conv_table <- AnnotationDbi::select(org.Hs.eg.db::org.Hs.eg.db,
keys=colnames(pb),
columns=c("ENSEMBL","SYMBOL", "MAP","GENENAME", "ENTREZID"), keytype="ENSEMBL")
conv_table %>% head()
#> ENSEMBL SYMBOL MAP
#> 1 V1 <NA> <NA>
#> 2 ENSG00000000003 TSPAN6 Xq22.1
#> 3 ENSG00000000005 TNMD Xq22.1
#> 4 ENSG00000000419 DPM1 20q13.13
#> 5 ENSG00000000457 SCYL3 1q24.2
#> 6 ENSG00000000460 FIRRM 1q24.2
#> GENENAME ENTREZID
#> 1 <NA> <NA>
#> 2 tetraspanin 6 7105
#> 3 tenomodulin 64102
#> 4 dolichyl-phosphate mannosyltransferase subunit 1, catalytic 8813
#> 5 SCY1 like pseudokinase 3 57147
#> 6 FIGNL1 interacting regulator of recombination and mitosis 55732
pb_long <- pb_transform %>% as_tibble(rownames = 'info') %>% separate(info, c("donor","class"), sep = '-majorclass\\.') %>%
mutate(donor = gsub("donor_id\\.","",donor)) %>%
pivot_longer(cols = starts_with("ENSG"))
pb_long %>%
left_join(conv_table %>% group_by(ENSEMBL) %>% summarise(SYMBOL = paste(SYMBOL, collapse= ', ')),
by = c("name" = "ENSEMBL")) %>%
filter(SYMBOL == 'NR6A1') %>%
ggplot(aes(x=class,y=(value))) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom() +
ylab("log1p(NR6A1)") +
cowplot::theme_cowplot() +
ggtitle("Pseudobulk Expression of NR6A1 in HRCA")
pb_fetal <- data.table::fread("../data/chen_fetal_hrca.pb_raw.csv.gz")
pb_mat_fetal <- pb_fetal[,2:ncol(pb_fetal)] %>% as.matrix()
row.names(pb_mat_fetal) <- pb_fetal %>% pull(1)
# pb_mat %>% rowSums() %>% log1p() %>% density() %>% plot()
pb_mat_fetal <- pb_mat_fetal[pb_mat_fetal %>% rowSums() %>% log1p() > 12,]
pb_transform_fetal <- t(metamoRph::normalize_data(t(pb_mat_fetal), log1p = TRUE))
pb_long_fetal <- pb_transform_fetal %>% as_tibble(rownames = 'info') %>% separate(info, c("donor","class"), sep = '-majorclass\\.') %>%
mutate(age = str_extract(donor, '\\d+th|\\d+st') %>% gsub('th|st','',.) %>% as.integer()) %>%
pivot_longer(cols = starts_with("ENSG"))
pb_long_fetal %>%
left_join(conv_table %>% group_by(ENSEMBL) %>% summarise(SYMBOL = paste(SYMBOL, collapse= ', ')),
by = c("name" = "ENSEMBL")) %>%
filter(SYMBOL == 'NR6A1') %>%
ggplot(aes(x=class,y=(value))) +
facet_wrap(~age) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom() +
ylab("log1p(NR6A1)") +
cowplot::theme_cowplot() +
ggtitle("Pseudobulk Expression of NR6A1 in fetal HRCA (facet by age(weeks))") +
scale_x_discrete(guide = guide_axis(angle = 90))
pb_long_fetal %>%
left_join(conv_table %>% group_by(ENSEMBL) %>% summarise(SYMBOL = paste(SYMBOL, collapse= ', ')),
by = c("name" = "ENSEMBL")) %>%
filter(SYMBOL == 'NR6A1') %>%
ggplot(aes(x=class,y=(value), color = age)) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom() +
ylab("log1p(NR6A1)") +
cowplot::theme_cowplot() +
ggtitle("Pseudobulk Expression of NR6A1 in fetal HRCA") +
scale_x_discrete(guide = guide_axis(angle = 90)) +
labs(color='age(weeks)') +
scale_color_viridis_c()
# gtex also snRNA
pb_gtex <- data.table::fread("../data/gtex_v8_pb.csv.gz")
pb_mat_gtex <- pb_gtex[,2:ncol(pb_gtex)] %>% as.matrix()
row.names(pb_mat_gtex) <- pb_gtex %>% pull(1)
# pb_mat %>% rowSums() %>% log1p() %>% density() %>% plot()
pb_mat_gtex <- pb_mat_gtex[pb_mat_gtex %>% rowSums() %>% log1p() > 9,]
pb_transform_gtex <- t(metamoRph::normalize_data(t(pb_mat_gtex), log1p = TRUE))
pb_long_gtex <- pb_transform_gtex %>%
as_tibble(rownames = 'info') %>%
mutate(class = str_extract(info, "Broad cell type\\.\\w+") %>% gsub("Broad cell type\\.", "",.),
tissue = str_extract(info, "Tissue.\\w+") %>% gsub("Tissue.", "",.)) %>%
pivot_longer(cols = -c(info, class, tissue))
pb_long_gtex %>%
filter(name == 'NR6A1') %>%
ggplot(aes(x=class,y=value)) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom(aes(color=tissue)) +
ylab("log1p(NR6A1)") +
cowplot::theme_cowplot() +
ggtitle("Pseudobulk Expression of NR6A1 in GTEx") +
scale_color_manual(values = pals::polychrome() %>% unname()) +
scale_x_discrete(guide = guide_axis(angle = 90))
pb_long_gtex %>%
filter(name == 'NR6A1') %>%
ggplot(aes(x=interaction(tissue,class),y=value)) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom(aes(color=tissue)) +
ylab("log1p(NR6A1)") +
cowplot::theme_cowplot() +
ggtitle("Pseudobulk Expression of NR6A1 in GTEx") +
scale_color_manual(values = pals::polychrome() %>% unname()) +
scale_x_discrete(guide = guide_axis(angle = 90))
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.3.0 (2023-04-21)
#> os macOS Ventura 13.6.6
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/New_York
#> date 2024-05-16
#> pandoc 3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> abind 1.4-5 2016-07-21 [1] CRAN (R 4.3.0)
#> AnnotationDbi 1.61.2 2023-03-24 [1] Bioconductor
#> beachmat 2.15.0 2022-12-20 [1] Bioconductor
#> beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.3.0)
#> Biobase 2.60.0 2023-04-25 [1] Bioconductor
#> BiocGenerics 0.46.0 2023-04-25 [1] Bioconductor
#> BiocNeighbors 1.17.1 2022-12-20 [1] Bioconductor
#> BiocParallel 1.33.11 2023-03-24 [1] Bioconductor
#> BiocSingular 1.15.0 2022-12-20 [1] Bioconductor
#> Biostrings 2.67.2 2023-04-19 [1] Bioconductor
#> bit 4.0.5 2022-11-15 [1] CRAN (R 4.3.0)
#> bit64 4.0.5 2020-08-30 [1] CRAN (R 4.3.0)
#> bitops 1.0-7 2021-04-24 [1] CRAN (R 4.3.0)
#> blob 1.2.4 2023-03-17 [1] CRAN (R 4.3.0)
#> bluster 1.9.1 2023-01-13 [1] Bioconductor
#> bslib 0.6.1 2023-11-28 [1] CRAN (R 4.3.1)
#> cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0)
#> callr 3.7.3 2022-11-02 [1] CRAN (R 4.3.0)
#> cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.0)
#> cluster 2.1.4 2022-08-22 [1] CRAN (R 4.3.0)
#> codetools 0.2-19 2023-02-01 [1] CRAN (R 4.3.0)
#> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0)
#> cowplot 1.1.1 2020-12-30 [1] CRAN (R 4.3.0)
#> crayon 1.5.2 2022-09-29 [1] CRAN (R 4.3.0)
#> data.table 1.14.8 2023-02-17 [1] CRAN (R 4.3.0)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.3.0)
#> DelayedArray 0.26.7 2023-07-30 [1] Bioconductor
#> DelayedMatrixStats 1.21.0 2022-12-20 [1] Bioconductor
#> devtools 2.4.5 2022-10-11 [1] CRAN (R 4.3.0)
#> dichromat 2.0-0.1 2022-05-02 [1] CRAN (R 4.3.0)
#> digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.0)
#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)
#> dqrng 0.3.0 2021-05-01 [1] CRAN (R 4.3.0)
#> edgeR 3.42.2 2023-05-02 [1] Bioconductor
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.0)
#> evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.0)
#> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.0)
#> farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.0)
#> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0)
#> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.0)
#> fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.0)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
#> GenomeInfoDb 1.36.1 2023-07-02 [1] Bioconductor
#> GenomeInfoDbData 1.2.10 2023-05-08 [1] Bioconductor
#> GenomicRanges 1.52.0 2023-04-25 [1] Bioconductor
#> ggbeeswarm 0.7.2 2023-04-29 [1] CRAN (R 4.3.0)
#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.3.0)
#> gtable 0.3.3 2023-03-21 [1] CRAN (R 4.3.0)
#> highr 0.10 2022-12-22 [1] CRAN (R 4.3.0)
#> hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
#> htmltools 0.5.7 2023-11-03 [1] CRAN (R 4.3.1)
#> htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.3.0)
#> httpuv 1.6.11 2023-05-11 [1] CRAN (R 4.3.0)
#> httr 1.4.6 2023-05-08 [1] CRAN (R 4.3.0)
#> igraph 1.4.3 2023-05-22 [1] CRAN (R 4.3.0)
#> IRanges 2.34.1 2023-07-02 [1] Bioconductor
#> irlba 2.3.5.1 2022-10-03 [1] CRAN (R 4.3.0)
#> jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.0)
#> jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.3.0)
#> KEGGREST 1.39.0 2022-12-20 [1] Bioconductor
#> knitr 1.43 2023-05-25 [1] CRAN (R 4.3.0)
#> labeling 0.4.2 2020-10-20 [1] CRAN (R 4.3.0)
#> later 1.3.1 2023-05-02 [1] CRAN (R 4.3.0)
#> lattice 0.21-8 2023-04-05 [1] CRAN (R 4.3.0)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.3.0)
#> limma 3.56.1 2023-05-07 [1] Bioconductor
#> locfit 1.5-9.7 2023-01-02 [1] CRAN (R 4.3.0)
#> lubridate * 1.9.2 2023-02-10 [1] CRAN (R 4.3.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
#> mapproj 1.2.11 2023-01-12 [1] CRAN (R 4.3.0)
#> maps 3.4.1 2022-10-30 [1] CRAN (R 4.3.0)
#> Matrix 1.6-5 2024-01-11 [1] CRAN (R 4.3.1)
#> MatrixGenerics 1.12.3 2023-07-30 [1] Bioconductor
#> matrixStats 1.0.0 2023-06-02 [1] CRAN (R 4.3.0)
#> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0)
#> metamoRph 0.2.2 2023-12-06 [1] local
#> metapod 1.7.0 2022-12-20 [1] Bioconductor
#> mime 0.12 2021-09-28 [1] CRAN (R 4.3.0)
#> miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.0)
#> org.Hs.eg.db 3.17.0 2023-08-23 [1] Bioconductor
#> pals 1.8 2023-08-23 [1] CRAN (R 4.3.0)
#> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
#> pkgbuild 1.4.0 2022-11-27 [1] CRAN (R 4.3.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
#> pkgload 1.3.3 2023-09-22 [1] CRAN (R 4.3.1)
#> png 0.1-8 2022-11-29 [1] CRAN (R 4.3.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.3.0)
#> processx 3.8.3 2023-12-10 [1] CRAN (R 4.3.1)
#> profvis 0.3.8 2023-05-02 [1] CRAN (R 4.3.0)
#> promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.3.0)
#> ps 1.7.5 2023-04-18 [1] CRAN (R 4.3.0)
#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.0)
#> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.3.0)
#> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.3.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.0)
#> Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.3.0)
#> RCurl 1.98-1.12 2023-03-27 [1] CRAN (R 4.3.0)
#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)
#> remotes 2.4.2 2021-11-30 [1] CRAN (R 4.3.0)
#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)
#> rmarkdown 2.23 2023-07-01 [1] CRAN (R 4.3.0)
#> RSQLite 2.3.1 2023-04-03 [1] CRAN (R 4.3.0)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.3.0)
#> rsvd 1.0.5 2021-04-16 [1] CRAN (R 4.3.0)
#> S4Arrays 1.2.0 2023-10-26 [1] Bioconductor
#> S4Vectors 0.38.1 2023-05-02 [1] Bioconductor
#> sass 0.4.7 2023-07-15 [1] CRAN (R 4.3.0)
#> ScaledMatrix 1.8.1 2023-05-03 [1] Bioconductor
#> scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)
#> scran 1.27.1 2022-12-20 [1] Bioconductor
#> scuttle 1.9.4 2023-01-23 [1] Bioconductor
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0)
#> shiny 1.8.0 2023-11-17 [1] CRAN (R 4.3.1)
#> SingleCellExperiment 1.22.0 2023-04-25 [1] Bioconductor
#> sparseMatrixStats 1.11.1 2022-12-30 [1] Bioconductor
#> statmod 1.5.0 2023-01-06 [1] CRAN (R 4.3.0)
#> stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0)
#> stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.3.0)
#> SummarizedExperiment 1.30.2 2023-06-11 [1] Bioconductor
#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)
#> tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.3.0)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.3.0)
#> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.3.0)
#> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.0)
#> urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.0)
#> usethis 2.1.6 2022-05-25 [1] CRAN (R 4.3.0)
#> utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.0)
#> vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.3.0)
#> vipor 0.4.5 2017-03-22 [1] CRAN (R 4.3.0)
#> viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.3.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.3.0)
#> xfun 0.42 2024-02-08 [1] CRAN (R 4.3.1)
#> xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.0)
#> XVector 0.40.0 2023-04-25 [1] Bioconductor
#> yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0)
#> zlibbioc 1.46.0 2023-04-25 [1] Bioconductor
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────